DEGs Visualization

Loading the necessary packages

pacman::p_load(tidyverse, DESeq2, ggVennDiagram, UpSetR, plotly, ggrepel, scico, apeglm)

Loading the data

# DEG object
dds1 <- readRDS("./results/deseq2_regions_dds.rds")


# The list of DEG results
res<-readRDS("./results/deseq2_regions_results_local.rds")
summary(res)
##         Length Class        Mode
## central 6      DESeqResults S4  
## south   6      DESeqResults S4

Loading the Annotations

This annotation file contains all P. cultripes transcripts by rows. As columns, we can find: * The gene IDs for P. cultripes, followed by the transcripts IDs and the peptides IDs (gene_id, transcript_id, peptide_id) * The IDs and descriptions of X. tropicalis annotated proteome resulting from both nucleotide and peptide blasting against P. cultripes transcripts (xenx_pep_id, xenx_gene_symbol, xenx_description, xenp_pep_id, xenp_gene_symbol, xenp_description).

# The annotation file
xtrop<-read.csv("./xtr109/diamondblast109.csv", stringsAsFactors = FALSE)

Extracting Significant DEGs

Extract DEGs lists in order to visualize overlapping regulated genes in the comparison of interest.

# List of DEGs

# Creating a function to extract the lists of genes repeatedly: all DEGs, up-regulated DEGs and down-regulated DEGs.

extract_degs<-function(x) {
  return(
    x %>%
      as_tibble(rownames = "gene") %>%
      filter(padj<0.05) %>%
      pull(gene)
  )
}

extract_up<-function(x) {
  return(
    x %>%
      as_tibble(rownames = "gene") %>%
      filter(padj<0.05) %>%
      filter(log2FoldChange>0) %>%
      pull(gene)
  )
}

extract_down<-function(x) {
  return(
    x %>%
      as_tibble(rownames = "gene") %>%
      filter(padj<0.05) %>%
      filter(log2FoldChange<0) %>%
      pull(gene)
  )
}

# Extracting all differentially expressed genes from all the comparisons stored in the list of DESeqResults.

sig_degs<-lapply(res, FUN=extract_degs)
str(sig_degs)
## List of 2
##  $ central: chr [1:93] "PECUL23A000127" "PECUL23A000194" "PECUL23A000652" "PECUL23A001362" ...
##  $ south  : chr [1:983] "PECUL23A000011" "PECUL23A000059" "PECUL23A000253" "PECUL23A000300" ...
up_degs<-lapply(res, FUN=extract_up)
down_degs<-lapply(res, FUN=extract_down)

Venn Diagram

library(ggvenn)
## Loading required package: grid
sig_degs_set <- list(Central=sig_degs$central,
                  South=sig_degs$south)

venn_palette <- c("dodgerblue","darkblue")

ggvenn(sig_degs_set, columns = c("Central","South"),stroke_size = 0.3,
       fill_color = venn_palette, stroke_color="black", show_percentage=F,
       fill_alpha=0.6, set_name_color = "black", text_size=6)

Upset Plot

# Plot Upset

upset(fromList(sig_degs),
      nsets = length(sig_degs),
      keep.order = T,
      nintersects = 100,
      number.angles = 0, point.size = 3, line.size = 1,
      sets.x.label = "Number of DEGs",
      set_size.show = TRUE,
      set_size.scale_max = max(sapply(sig_degs, length))+200, 
      text.scale = c(1.2, 1.2, 1.2, 1.2, 1.5, 1.5),
      sets.bar.color = c("dodgerblue","darkblue"),
      order.by=c("degree","freq"))

Volcano plot

Showing the log fold change plotted against the -log10() transformed adjusted p-values per gene.

Central Volcano Plot

genes_to_label <- c("mcl1", "mmp9.1")

central_data <- res$central %>%
  as_tibble(rownames = "gene_id") %>%
  drop_na(padj) %>% # drop all genes with NAs
  filter(padj < 0.5) %>% # reduce the number of points that need to be plotted
  mutate(
    category = case_when(
      padj < 0.05 & log2FoldChange > 0 ~ "Upregulated",
      padj < 0.05 & log2FoldChange < 0 ~ "Downregulated",
      TRUE ~ "Non-significant"
    )
  ) %>%
  left_join(xtrop, by = c("gene_id")) # add annotations

# Calculate the number of upregulated and downregulated genes
n_upregulated <- sum(central_data$category == "Upregulated")
n_downregulated <- sum(central_data$category == "Downregulated")

# Create the plot
gg_central <- central_data %>%
  ggplot(aes(
    x = log2FoldChange, y = -log10(padj), color = category,
    text = paste0("</br>Pcu23 gene: ", gene_id,
                  "</br>X.tr peptide: ", xenp_gene_symbol,
                  "</br>X.tr description: ", xenp_description)
  )) +
  geom_point(alpha = 0.75, shape = 16) +
  scale_color_manual(values = c(
    "Non-significant" = "#808080",
    "Upregulated" = "#FF5733",
    "Downregulated" = "#0F8CBA"
  )) +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#555555") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "#555555") +
  annotate("text", x = 5, y = max(-log10(central_data$padj), na.rm = TRUE) - 0.5, 
           label = paste0("Upregulated: ", n_upregulated), color = "#FF5733", hjust = 1) +
  annotate("text", x = -5, y = max(-log10(central_data$padj), na.rm = TRUE) - 0.5, 
           label = paste0("Downregulated: ", n_downregulated), color = "#0F8CBA", hjust = 0) +
  scale_x_continuous(breaks = seq(-6, 6, by = 2), limits = c(-6, 6)) +
  theme_minimal() +
  theme(
    legend.position = "none",
  ) +
  geom_text_repel(
    data = subset(central_data, xenp_gene_symbol %in% genes_to_label),
    aes(label = xenp_gene_symbol),
    size = 3,
    box.padding = unit(1.7, "lines"),
    point.padding = unit(0.1, "lines"),
    color = "black",
    segment.color = 'black'
  )

gg_central
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("central_volcanoplot.jpeg", width = 7, height = 5 , dpi = 600)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

South Volcano Plot

genes_to_label <- c("mmp11.1", "ass1", "sec14l3", "stard8", "klf9", "hsp90aa1.1")
extra_gene <- "PECUL23A051069T1"
extra_gene_2 <- "leprotl1"


south_data <- res$south %>%
  as_tibble(rownames = "gene_id") %>%
  drop_na(padj) %>% # drop all genes with NAs
  filter(padj < 0.5) %>% # reduce the number of points that need to be plotted
  mutate(
    category = case_when(
      padj < 0.05 & log2FoldChange > 0 ~ "Upregulated",
      padj < 0.05 & log2FoldChange < 0 ~ "Downregulated",
      TRUE ~ "Non-significant"
    )
  ) %>%
  left_join(xtrop, by = c("gene_id")) # add annotations

# Calculate the number of upregulated and downregulated genes
n_upregulated <- sum(south_data$category == "Upregulated")
n_downregulated <- sum(south_data$category == "Downregulated")

# Create the plot
gg_south <- south_data %>%
  ggplot(aes(
    x = log2FoldChange, y = -log10(padj), color = category,
    text = paste0("</br>Pcu23 gene: ", gene_id,
                  "</br>X.tr peptide: ", xenp_pep_id,
                  "</br>X.tr description: ", xenp_description)
  )) +
  geom_point(alpha = 0.75, shape = 16) +
  scale_color_manual(values = c(
    "Non-significant" = "#808080",
    "Upregulated" = "#FF5733",
    "Downregulated" = "#0F8CBA"
  )) +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#555555") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "#555555") +
  annotate("text", x = 5, y = max(-log10(south_data$padj), na.rm = TRUE) - 0.5, 
           label = paste0("Upregulated: ", n_upregulated), color = "#FF5733", hjust = 1) +
  annotate("text", x = -5, y = max(-log10(south_data$padj), na.rm = TRUE) - 0.5, 
           label = paste0("Downregulated: ", n_downregulated), color = "#0F8CBA", hjust = 0) +
  scale_x_continuous(breaks = seq(-6, 6, by = 2), limits = c(-6, 6)) +
  theme_minimal() +
  theme(
    legend.position = "none",
  ) +
  geom_text_repel(
    data = subset(south_data, xenp_gene_symbol %in% genes_to_label),
    aes(label = xenp_gene_symbol),
    size = 3,
    box.padding = unit(3, "lines"),
    point.padding = unit(0, "lines"),
    color = "black",
    segment.color = "black",
    max.overlaps = Inf
  ) +
  geom_text_repel(
    data = subset(south_data, transcript_id %in% extra_gene),
    aes(label = xenp_gene_symbol),
    size = 3,
    box.padding = unit(2.7, "lines"),
    point.padding = unit(0, "lines"),
    color = "black",
    segment.color = "black",
    max.overlaps = Inf
  ) +
  geom_text_repel(
    data = subset(south_data, xenp_gene_symbol %in% extra_gene_2),
    aes(label = xenp_gene_symbol),
    size = 3,
    box.padding = unit(3, "lines"),
    point.padding = unit(0, "lines"),
    color = "black",
    segment.color = "black",
    max.overlaps = Inf
  )

gg_south
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("south_volcanoplot.jpeg", width = 7, height = 5 , dpi = 600)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Getting information about interesting genes

We filter in the DESeq2 results object those DEGs which are significantly DE.

pull_genes<-function(x, alpha=0.05, lfc=0, signed="both", feature="xenp_gene_symbol") {
  
  # filter by adjusted p
  x<-x %>%
    filter(padj<alpha)
  
  if(signed=="up"){
    x<-x %>%
      filter(log2FoldChange>lfc) %>%
      pull(feature)
  }
  
  if(signed=="down"){
    x<-x %>%
      filter(log2FoldChange<(lfc*-1)) %>%
      pull(feature) 
  }
  
  if(signed=="both"){
    x<-x %>%
      filter(abs(log2FoldChange)>lfc) %>%
      pull(feature) 
  }
  
  ## deal with multiple gene annotations for the same gene (different transcripts)
  
  x<-strsplit(x, ";") %>% unlist() %>% unique()
  x<-x[!is.na(x)]
  
  return(x[!is.na(x)])
}

# now extract all

degs_ids<-lapply(res, FUN=function(x) 
  x %>%
  as_tibble(rownames = "gene_id") %>%
  left_join(xtrop) %>%
    pull_genes(feature = "gene_id", alpha = 0.05, lfc=0, signed = "both")
)
## Joining with `by = join_by(gene_id)`
## Joining with `by = join_by(gene_id)`
str(degs_ids)
## List of 2
##  $ central: chr [1:93] "PECUL23A000127" "PECUL23A000194" "PECUL23A000652" "PECUL23A001362" ...
##  $ south  : chr [1:983] "PECUL23A000011" "PECUL23A000059" "PECUL23A000253" "PECUL23A000300" ...

After that, we join the annotations to those DEGs and arrange them by their lowest adjusted p-value to get more infromation on those DEGs.

extract_info <- function(sig_genes, deseq_result, xtrop) {
  # Convert DESeq results to tibble
  deseq_tibble <- as_tibble(deseq_result, rownames = "gene_id")
  
  # Filter DESeq results for significant DEGs based on gene_id
  filtered_deseq_result <- deseq_tibble %>%
    filter(gene_id %in% sig_genes)
  
  # Perform left join with annotation data
  detailed_info <- filtered_deseq_result %>%
    left_join(xtrop, by = "gene_id")
  
  return(detailed_info)
}
sig_degs_central <- extract_info(degs_ids$central, res$central, xtrop) %>%
    arrange(padj)
sig_degs_south <- extract_info(degs_ids$south, res$south, xtrop) %>%
    arrange(padj)

intersection <- extract_info(intersect(sig_degs_set$Central, sig_degs_set$South), res$south, xtrop)

write.csv(sig_degs_central, file = "sig_degs_central.csv")
write.csv(sig_degs_south, file = "sig_degs_south.csv")

Apoptosis

extract_info_by_keywords(res$central, c("mmp11", "apoptosis"), xtrop)
## # A tibble: 31 × 15
##    gene_id   baseMean log2FoldChange  lfcSE   stat  pvalue    padj transcript_id
##    <chr>        <dbl>          <dbl>  <dbl>  <dbl>   <dbl>   <dbl> <chr>        
##  1 PECUL23A…   2303.          0.577  0.127   4.52  6.09e-6 0.00374 PECUL23A0615…
##  2 PECUL23A…     96.6         1.14   0.384   2.96  3.04e-3 0.204   PECUL23A0095…
##  3 PECUL23A…    212.          0.220  0.113   1.95  5.14e-2 0.652   PECUL23A0382…
##  4 PECUL23A…    367.         -0.123  0.0678 -1.82  6.92e-2 0.702   PECUL23A0177…
##  5 PECUL23A…     43.5         1.82   1.25    1.45  1.46e-1 0.796   PECUL23A0547…
##  6 PECUL23A…   1275.         -0.158  0.122  -1.29  1.97e-1 0.831   PECUL23A0412…
##  7 PECUL23A…     69.5         0.204  0.159   1.28  2.00e-1 0.835   PECUL23A0180…
##  8 PECUL23A…    745.          0.250  0.221   1.13  2.58e-1 0.866   PECUL23A0234…
##  9 PECUL23A…    206.         -0.155  0.148  -1.05  2.93e-1 0.887   PECUL23A0303…
## 10 PECUL23A…    725.         -0.0759 0.0818 -0.928 3.53e-1 0.914   PECUL23A0577…
## # ℹ 21 more rows
## # ℹ 7 more variables: peptide_id <chr>, xenx_pep_id <chr>,
## #   xenx_gene_symbol <chr>, xenx_description <chr>, xenp_pep_id <chr>,
## #   xenp_gene_symbol <chr>, xenp_description <chr>
extract_info_by_keywords(res$south, c("mmp11", "apoptosis"), xtrop)
## # A tibble: 31 × 15
##    gene_id    baseMean log2FoldChange  lfcSE  stat  pvalue    padj transcript_id
##    <chr>         <dbl>          <dbl>  <dbl> <dbl>   <dbl>   <dbl> <chr>        
##  1 PECUL23A0…     43.5          5.03  1.19    4.22 2.49e-5 0.00235 PECUL23A0547…
##  2 PECUL23A0…    160.           0.520 0.163   3.19 1.42e-3 0.0290  PECUL23A0442…
##  3 PECUL23A0…    104.          -0.283 0.125  -2.27 2.33e-2 0.150   PECUL23A0044…
##  4 PECUL23A0…    206.          -0.315 0.148  -2.13 3.33e-2 0.185   PECUL23A0303…
##  5 PECUL23A0…     69.5          0.304 0.158   1.93 5.35e-2 0.239   PECUL23A0180…
##  6 PECUL23A0…   1273.          -0.159 0.0846 -1.88 6.08e-2 0.258   PECUL23A0303…
##  7 PECUL23A0…    202.          -0.231 0.142  -1.63 1.03e-1 0.345   PECUL23A0527…
##  8 PECUL23A0…   2303.           0.202 0.127   1.59 1.12e-1 0.360   PECUL23A0615…
##  9 PECUL23A0…     98.0         -0.256 0.189  -1.35 1.76e-1 0.450   PECUL23A0342…
## 10 PECUL23A0…    220.           0.103 0.0781  1.31 1.89e-1 0.469   PECUL23A0009…
## # ℹ 21 more rows
## # ℹ 7 more variables: peptide_id <chr>, xenx_pep_id <chr>,
## #   xenx_gene_symbol <chr>, xenx_description <chr>, xenp_pep_id <chr>,
## #   xenp_gene_symbol <chr>, xenp_description <chr>

Interactive Volcano Plots

# we can now turn this into an interactive plot:
ggplotly(gg_central, tooltip="text")
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
ggplotly(gg_south, tooltip="text")
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues